# 京东爬取笔记本评论第一个商品的550条评论
import selenium
from selenium.common import exceptions
from urllib3 import encode_multipart_formdata
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# 处理html 以使用xpath来解析html
from lxml import etree
from pywinauto import Desktop
from selenium.common.exceptions import NoSuchElementException
import time
from selenium.webdriver.chrome.options import Options
import pandas as pd
import xlwt

# 开启谷歌浏览器
browser = webdriver.Chrome()


def go_jd():
    while 1:
        # 进入京东首页
        jd_index_url = r'https://www.jd.com/'
        browser.get(jd_index_url)
        time.sleep(1)
        # 找到搜索框
        key = input("请输入关键字")
        browser.find_element_by_xpath(r"//*[@id='key']").send_keys(key)

        time.sleep(1)
        # 点击提交
        browser.find_element_by_xpath(
            r"//*[@id='search']/div/div[2]/button").click()
        time.sleep(1)
        # 点击第一个商品
        try:
            data_sku = browser.find_element_by_xpath(
                r"//*[@id='J_goodsList']/ul/li[1]").get_attribute("data-sku")
            good_page_url = r"https://item.jd.com/{0}.html".format(data_sku)
            # 进入商品页
            browser.get(good_page_url)
            time.sleep(3)
            browser.execute_script(
                "document.documentElement.scrollTop=1000")  # 滚动到底部
            time.sleep(6)
            # 获取当前url 并加上#commemt进入评论
            # comment_url = good_page_url + "#comment"
            c = browser.find_element(
                By.XPATH, '//li[contains(text(), "商品评价")]').click()

            # 进入评论
            # browser.get(comment_url)

            # 一个评论 //*[@id="comment-0"]/div[1]/div[2]/p //*[@id="comment-0"]/div[2]/div[2]/p
            # 最后一条 //*[@id="comment-0"]/div[10]/div[2]/p //*[@id="comment-0"]/div[10]/div[2]/p
            # 说明一页有10个评论 每爬取10个评论翻一页
            # 开始循环爬取
            time.sleep(5)
            # 存储信息的二维数组
            data_info = []
            comment_num = 1
            page_index = 1
            while len(data_info) <= 550:
                # 翻页
                if (comment_num > 10):
                    time.sleep(3)
                    next = browser.find_element_by_css_selector(
                        r"#comment-0 > div.com-table-footer > div > div > a.ui-pager-next")
                    browser.execute_script('arguments[0].click();', next)
                    page_index += 1
                    comment_num = 1
                    time.sleep(2)
                comment_content = browser.find_element_by_xpath(
                    r"//*[@id='comment-0']/div[{0}]/div[2]/p".format(str(comment_num))).text
                user = browser.find_element_by_xpath(
                    r"//*[@id='comment-0']/div[{0}]/div[1]/div[1]".format(str(comment_num))).text
                # date = browser.find_element_by_xpath(r"//*[@id='comment-0']/div[{0}]/div[2]/div[4]/div[1]".format(str(comment_num))).text
                data_info.append([user, comment_content])
                print("第" + str(comment_num) + " \n个")
                comment_num += 1
                print("爬取成功第 评论为 " + str(comment_content) + " ] \n")
            # 写入excel文件
            write_excel(data_info, "{0}评论信息".format(key))
        except Exception as e:
            print(e)
            print("出错了")
            # 写入excel文件
            write_excel(data_info, "{0}评论信息".format(key))
        # 判断还要继续读取
        result = print("{0}写入完毕，是否还要继续读取 1 继续 0 退出")
        if (result == 0):
            print("Bye! ~")
            break


def write_excel(data, txt_name):
    workbook = xlwt.Workbook()
    sheet = workbook.add_sheet("Sheet")

    # 标题

    for i, item in enumerate(data):
        for j, sub in enumerate(item):
            sheet.write(i+1, j, sub)

    workbook.save(txt_name + ".xls")


if __name__ == '__main__':
    go_jd()

